library(ggplot2)
require(data.table)
library("dplyr")
library("ggpubr") 
require(tidyverse) #for easy data manipulation and visualization
require(caret) #for easy machine learning workflow
library(ncdf4)
library(raster)
library("forcats")

#change to full local path to the replication data
wd <- "C:\\Users\\YFan\\OneDrive - NORCE\\NFood_replication\\"

crops = c("Corn", "Wheat", "Soybean", "Cotton", "Rice", "Sugarcane", "Tropical Corn", "Tropical Soybean")
pft = c(17,18,  19,20,   23,24,    41,42,    61,62,   67,68,   75,76, 77,78)#rainfed vs. irrigated
case = c("RCP45","RCP45_85LU")
casename = c("RCP45",expression('RCP45'[(SSP5LU)]))


#before cropping with box, rotate all rasters from 0 to 360 to standard coordinates between -180 and 180 degrees

#new regions consistent with Fig.1
region = c("NAM", "SEA", "EUA", "SAM", "CAM", "SAF", "Global")
box_n = c(60,   55,  60,   0,  30,  15, 90) #larger box for SEA and SAF
box_s = c(30,  -10,  36, -40,   0, -30, -90)
box_w = c(-130, 70, -10, -80,-110,  -12, -180) #do not change lon to 0:360, because it is difficult to clip SAF and EUA which cross lon=0 
box_e = c(-70, 130,  70, -35, -50,  50, 180) #re-organize the conditional matrix and sample matrix instead 


###======get climate and soil variables

#regional masking for all varaibles per crop
#df.tsa1.all<-df.tsa2.all<-df.ppt1.all<-df.ppt2.all<-df.sminn1.all<-df.sminn2.all<-c()
cft = c(17,18, 19,20, 23,24, 41,42, 61,62, 67,68, 75,76, 77,78)-15+1 #pft for active crops
#cft1 = c(17, 19, 23, 41, 61, 67, 75, 77)-15+1 #pft index for active rainfed crops
#cft2 = c(18, 20, 24, 42, 62, 68, 76, 78)-15+1 #pft index for active irrigated crops

df.all<-df2.all<-df3.all<-c()
for (n in 1:6) { #there are 16 cft levels in the raster brick; read and merge 8 to 6 crop types (irrigated vs. rainfed)
  print(paste0("now process crop: ",crops[n]))
  
  ncft1 = seq(1,15,2)[n]  #rainfed
  ncft2 = seq(2,16,2)[n]  #irrigated
  #use brick to read nc as a raster, lvar=3 indicate cft as the level dimension, level=1 shows the first crop
  #PROD11/12 indicate rainfed/irrigated for SSP2 (RCP45), PROD21/22 indicate rainfed/irrigated for SSP5 (RCP45_85LU)
  PROD11 <- brick(paste0(wd, "/data/yield_data/RCP45_crop_data.2006-2100.nc"), varname="CROP_PROD", lvar=3, level=ncft1)
  PROD12 <- brick(paste0(wd, "/data/yield_data/RCP45_crop_data.2006-2100.nc"), varname="CROP_PROD", lvar=3, level=ncft2) #merge rainfed+irrigated
  PROD21 <- brick(paste0(wd, "/data/yield_data/RCP45_85LU_crop_data.2006-2100.nc"), varname="CROP_PROD", lvar=3, level=ncft1)
  PROD22 <- brick(paste0(wd, "/data/yield_data/RCP45_85LU_crop_data.2006-2100.nc"), varname="CROP_PROD", lvar=3, level=ncft2)

  AREA11 <- brick(paste0(wd, "/data/yield_data/RCP45_crop_data.2006-2100.nc"), varname="CROP_AREA", lvar=3, level=ncft1)
  AREA12 <- brick(paste0(wd, "/data/yield_data/RCP45_crop_data.2006-2100.nc"), varname="CROP_AREA", lvar=3, level=ncft2) 
  AREA21 <- brick(paste0(wd, "/data/yield_data/RCP45_85LU_crop_data.2006-2100.nc"), varname="CROP_AREA", lvar=3, level=ncft1)
  AREA22 <- brick(paste0(wd, "/data/yield_data/RCP45_85LU_crop_data.2006-2100.nc"), varname="CROP_AREA", lvar=3, level=ncft2)

  NAvalue(AREA11) #check NA=-Inf (this is just default value when _FillValue is not set in the raster)
  AREA11@file@nodatavalue #-Inf is wrong -> actual missing value is 9.969210e+36
  #have to set 9.9692e+36 as NA (ocean), so that area weighted averaging is correct
  AREA11[AREA11>=9.9692e+36] <- NA 
  AREA12[AREA12>=9.9692e+36] <- NA
  AREA21[AREA21>=9.9692e+36] <- NA 
  AREA22[AREA22>=9.9692e+36] <- NA
  PROD11[PROD11>=9.9692e+36] <- NA
  PROD12[PROD12>=9.9692e+36] <- NA  
  PROD21[PROD21>=9.9692e+36] <- NA
  PROD22[PROD22>=9.9692e+36] <- NA 
  
  # ncft1 = cft1[n]  #cft index in the surface data
  # ncft2 = cft2[n]  #irrigated
  ncft11 = cft[ncft1] #cft index in the surface data
  ncft22 = cft[ncft2]
  ##NOTE: unzip the FERTNITRO files under folder /rawdata/FERTNITRO_CFT_SSP2-4.5_SSP5-8.5.zip to /rawdata/*
  FERTN11 <- brick((paste0(wd, "/data/yield_data/rawdata/FERTNITRO_CFT_SSP2-4.5_1deg_1850-2100.nc")), "FERTNITRO_CFT", lvar=3, level=ncft11)
  FERTN12 <- brick((paste0(wd, "/data/yield_data/rawdata/FERTNITRO_CFT_SSP2-4.5_1deg_1850-2100.nc")), "FERTNITRO_CFT", lvar=3, level=ncft22)
  FERTN21 <- brick((paste0(wd, "/data/yield_data/rawdata/FERTNITRO_CFT_SSP5-8.5_1deg_1850-2100.nc")), "FERTNITRO_CFT", lvar=3, level=ncft11)
  FERTN22 <- brick((paste0(wd, "/data/yield_data/rawdata/FERTNITRO_CFT_SSP5-8.5_1deg_1850-2100.nc")), "FERTNITRO_CFT", lvar=3, level=ncft22)
  FERTN11 <- subset(FERTN11,157:251) #take years 2006-2100
  FERTN12 <- subset(FERTN12,157:251)
  FERTN21 <- subset(FERTN21,157:251)
  FERTN22 <- subset(FERTN22,157:251)
  FERTN11@extent <- AREA11@extent #surface data does not have correct coordinates
  FERTN12@extent <- AREA12@extent #copy frm AREA (same resolution 192x288)
  FERTN21@extent <- AREA21@extent
  FERTN22@extent <- AREA22@extent
  FERTN11[is.na(AREA11)] <- NA  #set NA for fertn data
  FERTN12[is.na(AREA12)] <- NA
  FERTN21[is.na(AREA21)] <- NA
  FERTN22[is.na(AREA22)] <- NA
  
  # Below method will cause NA values for those grid cells with either temperate or tropical cultivar (when not both)
  #merge temperate and tropical soy and corn after aggregation (weighted mean) to regional or global
  if (crops[n]=="Corn" | crops[n]=="Soybean") {
    ncft1 <- seq(1,15,2)[crops==paste0("Tropical ",crops[n])] # rainfed Tropical Corn or Tropical Soybean
    ncft2 <- seq(2,16,2)[crops==paste0("Tropical ",crops[n])] # irrigated Tropical Corn or Tropical Soybean
    PROD11.trop <- brick(paste0(wd, "/data/yield_data/RCP45_crop_data.2006-2100.nc"), varname="CROP_PROD", lvar=3, level=ncft1) #merge tropical cultivars
    PROD12.trop <- brick(paste0(wd, "/data/yield_data/RCP45_crop_data.2006-2100.nc"), varname="CROP_PROD", lvar=3, level=ncft2)
    PROD21.trop <- brick(paste0(wd, "/data/yield_data/RCP45_85LU_crop_data.2006-2100.nc"), varname="CROP_PROD", lvar=3, level=ncft1)
    PROD22.trop <- brick(paste0(wd, "/data/yield_data/RCP45_85LU_crop_data.2006-2100.nc"), varname="CROP_PROD", lvar=3, level=ncft2)

    AREA11.trop <- brick(paste0(wd, "/data/yield_data/RCP45_crop_data.2006-2100.nc"), varname="CROP_AREA", lvar=3, level=ncft1)
    AREA12.trop <- brick(paste0(wd, "/data/yield_data/RCP45_crop_data.2006-2100.nc"), varname="CROP_AREA", lvar=3, level=ncft2)
    AREA21.trop <- brick(paste0(wd, "/data/yield_data/RCP45_85LU_crop_data.2006-2100.nc"), varname="CROP_AREA", lvar=3, level=ncft1)
    AREA22.trop <- brick(paste0(wd, "/data/yield_data/RCP45_85LU_crop_data.2006-2100.nc"), varname="CROP_AREA", lvar=3, level=ncft2)

    #set 9.9692e+36 as NA (for ocean grid cells).
    AREA11.trop[AREA11.trop>=9.9692e+36] <- NA
    AREA12.trop[AREA12.trop>=9.9692e+36] <- NA
    AREA21.trop[AREA21.trop>=9.9692e+36] <- NA
    AREA22.trop[AREA22.trop>=9.9692e+36] <- NA
    PROD11.trop[PROD11.trop>=9.9692e+36] <- NA
    PROD12.trop[PROD12.trop>=9.9692e+36] <- NA
    PROD21.trop[PROD21.trop>=9.9692e+36] <- NA
    PROD22.trop[PROD22.trop>=9.9692e+36] <- NA

    ncft11 = cft[ncft1] #get cft index in the surface data
    ncft22 = cft[ncft2]
    #nitrogen fertilizer for each crop is "gN/m2/yr", should be weight averaged across crop types
    FERTN11.trop <- brick((paste0(wd, "/data/yield_data/rawdata/FERTNITRO_CFT_SSP2-4.5_1deg_1850-2100.nc")), "FERTNITRO_CFT", lvar=3, level=ncft11)
    FERTN12.trop <- brick((paste0(wd, "/data/yield_data/rawdata/FERTNITRO_CFT_SSP2-4.5_1deg_1850-2100.nc")), "FERTNITRO_CFT", lvar=3, level=ncft22)
    FERTN21.trop <- brick((paste0(wd, "/data/yield_data/rawdata/FERTNITRO_CFT_SSP5-8.5_1deg_1850-2100.nc")), "FERTNITRO_CFT", lvar=3, level=ncft11)
    FERTN22.trop <- brick((paste0(wd, "/data/yield_data/rawdata/FERTNITRO_CFT_SSP5-8.5_1deg_1850-2100.nc")), "FERTNITRO_CFT", lvar=3, level=ncft22)
    FERTN11.trop <- subset(FERTN11.trop,157:251) #take years 2006-2100
    FERTN12.trop <- subset(FERTN12.trop,157:251)
    FERTN21.trop <- subset(FERTN21.trop,157:251)
    FERTN22.trop <- subset(FERTN22.trop,157:251)
    FERTN11.trop@extent <- AREA11.trop@extent #surface data does not have correct coordinates
    FERTN12.trop@extent <- AREA12.trop@extent #copy frm AREA, otherwise error "different resolution"
    FERTN21.trop@extent <- AREA21.trop@extent
    FERTN22.trop@extent <- AREA22.trop@extent
    FERTN11.trop[is.na(AREA11.trop)] <- NA
    FERTN12.trop[is.na(AREA12.trop)] <- NA
    FERTN21.trop[is.na(AREA21.trop)] <- NA
    FERTN22.trop[is.na(AREA22.trop)] <- NA
    
    plot(raster(FERTN11, layer=95)) #tropical and temperate cultivars do not overlap
    plot(raster(FERTN11.trop, layer=95))
    plot(raster(AREA11, layer=95))
    plot(raster(AREA11.trop, layer=95)) 
    FERTN11 <- FERTN11 + FERTN11.trop 
    FERTN12 <- FERTN12 + FERTN12.trop #this method gives consistent fertn as GridSample-SSP2-SSP5-Data-final.Rdata
    FERTN21 <- FERTN21 + FERTN21.trop
    FERTN22 <- FERTN22 + FERTN22.trop
    #Below method gives lower mean fertn for corn and soybean (error when calc fertn1.w <- overlay(...))
    #tropical and temperate cultivars do not overlap, no need to do area weighted average at each grid cell
    # FERTN11 <- (FERTN11.trop*AREA11.trop + FERTN11*AREA11)/(AREA11 + AREA11.trop)
    # FERTN12 <- (FERTN12.trop*AREA12.trop + FERTN12*AREA12)/(AREA12 + AREA12.trop)
    # FERTN21 <- (FERTN21.trop*AREA21.trop + FERTN21*AREA21)/(AREA21 + AREA21.trop)
    # FERTN22 <- (FERTN22.trop*AREA22.trop + FERTN22*AREA22)/(AREA22 + AREA22.trop)
    plot(raster(FERTN11, layer=95))
  
    AREA11 <- AREA11+AREA11.trop
    AREA12 <- AREA12+AREA12.trop
    AREA21 <- AREA21+AREA21.trop
    AREA22 <- AREA22+AREA22.trop
    PROD11 <- PROD11+PROD11.trop
    PROD12 <- PROD12+PROD12.trop
    PROD21 <- PROD21+PROD21.trop
    PROD22 <- PROD22+PROD22.trop  

  }

  #===merge rainfed+irrigated====
  PROD1 <- PROD11+PROD12
  PROD2 <- PROD21+PROD22
  AREA1 <- AREA11+AREA12
  AREA2 <- AREA21+AREA22

  FERTN1 <- (FERTN11*AREA11+FERTN12*AREA12)/AREA1
  FERTN2 <- (FERTN21*AREA21+FERTN22*AREA22)/AREA2

  
  YIELD11 <- PROD11/AREA11 
  YIELD12 <- PROD12/AREA12
  YIELD21 <- PROD21/AREA21 
  YIELD22 <- PROD22/AREA22
  YIELD1 <- PROD1/AREA1 
  YIELD2 <- PROD2/AREA2 #unit: MtC/ha

  #IRRIG1 <- AREA12/AREA1*100 #irrigated %
  #IRRIG2 <- AREA22/AREA2*100
  
  nlayers(AREA1) #years as layer
  nrow(AREA1);ncol(AREA1) 
  NAvalue(AREA1) #check NA=1e+30 (this is just default value when _FillValue is not set in the raster)
  AREA1@file@nodatavalue #1e+30 is wrong -> actual missing value is 9.969210e+36
  #NAvalue(AREA1) <- 9.96921e+36 #not useful; set NA >=9.9692e+36 instead
  AREA1[AREA1>=9.9692e+36] <- NA #now it works, check plot
  AREA2[AREA2>=9.9692e+36] <- NA
  PROD1[PROD1>=9.9692e+36] <- NA
  PROD2[PROD2>=9.9692e+36] <- NA
  YIELD1[YIELD1>=9.9692e+36] <- NA 
  YIELD2[YIELD2>=9.9692e+36] <- NA
  #verify if NA is set correctly
  head(raster(AREA1, layer=95)[,]) #9.96921e+36
 
  #for (n in 1:6) { #6 main cfts
  #==================read annual climate and soil data
  tsa1 <- brick((paste0(wd, "/data/climate_data/TSA.RCP45.1deg.clm5.h0.2006-2100-annual.nc")), "TSA")
  tsa2 <- brick((paste0(wd, "/data/climate_data/TSA.RCP45_85LU.1deg.clm5.h0.2006-2100-annual.nc")), "TSA")
  NAvalue(tsa1) #NA value is correctly set for climate variables
  #str(tsa1@data) 
  #plot(raster(tsa1,95))

  rain1 <- brick((paste0(wd, "/data/climate_data/RAIN.RCP45.1deg.clm5.h0.2006-2100-annual.nc")), "RAIN")
  snow1 <- brick((paste0(wd, "/data/climate_data/SNOW.RCP45.1deg.clm5.h0.2006-2100-annual.nc")), "SNOW")
  rain2 <- brick((paste0(wd, "/data/climate_data/RAIN.RCP45_85LU.1deg.clm5.h0.2006-2100-annual.nc")), "RAIN") 
  snow2 <- brick((paste0(wd, "/data/climate_data/SNOW.RCP45_85LU.1deg.clm5.h0.2006-2100-annual.nc")), "SNOW")
  
  rh1 <- brick((paste0(wd, "/data/climate_data/RH2M.RCP45.1deg.clm5.h0.2006-2100-annual.nc")), varname="RH2M") #surface (2m) climate variables
  rh2 <- brick((paste0(wd, "/data/climate_data/RH2M.RCP45_85LU.1deg.clm5.h0.2006-2100-annual.nc")), varname="RH2M") #specify variable
  
  sminn1 <- brick((paste0(wd, "/data/climate_data/SMINN.RCP45.1deg.clm5.h0.2006-2100-annual.nc")), "SMINN")
  sminn2 <- brick((paste0(wd, "/data/climate_data/SMINN.RCP45_85LU.1deg.clm5.h0.2006-2100-annual.nc")), "SMINN")
  
  #======aggregate monthly to annual for climate and soil data
  # str(tsa1@data) #95x12 monthly data
  # mondates <- seq(as.Date("2006-01-01"), as.Date("2100-12-31"), by="month")  
  # years <- format(mondates, "%Y") #or years=rep(2006:2100, each=12)
  # month2years <- function(x) {  
  #   agg <- aggregate(x, by=list(years), mean)  
  #   return(agg$x)  
  # } 
  # tsa1.a<- calc(tsa1, month2years) #very slow!
  #TOO slow; use NCO ncra function to convert monthly files to annual!
  #ncra --mro -O -d time,,,12,12 SMINN.RCP85.1deg.clm5.h0.2006-2100.nc SMINN.RCP85.1deg.clm5.h0.2006-2100-annual.nc
  
  sand <- brick(paste0(wd, "/data/land_cover/PCT_SAND.1deg.clm5.surfdata.nc"), "PCT_SAND") #10 soil layers
  clay <- brick(paste0(wd, "/data/land_cover/PCT_CLAY.1deg.clm5.surfdata.nc"), "PCT_CLAY")
  sand@extent <- AREA1@extent #sand and clay does not have correct coordinates
  clay@extent <- AREA1@extent #copy frm AREA
  #plot(raster(sand,1))
  NAvalue(sand) #NA=43 for sand/clay, does not matter

  
  df.reg <-df2.reg<-df3.reg<-c()
  for (reg in 1:7){ #6 regions + global
    #regional box for clip
    if (reg<7) {
      box <- extent(box_w[reg], box_e[reg], box_s[reg], box_n[reg])
    } else{box <- extent(-179,181,-90.5,90.5)} #global cordinates after rotate
    
    #rotate all rasters from 0 to 360, to -180 and 180 before crop()
    area11.reg <- crop(raster::rotate(AREA11), box) #RCP45
    area12.reg <- crop(raster::rotate(AREA12), box) #irrig
    area21.reg <- crop(raster::rotate(AREA21), box) #rainfed
    area22.reg <- crop(raster::rotate(AREA22), box) #RCP45_85LU
    yield11.reg <- crop(raster::rotate(YIELD11), box) #RCP45
    yield12.reg <- crop(raster::rotate(YIELD12), box) #irrig
    yield21.reg <- crop(raster::rotate(YIELD21), box) #rainfed
    yield22.reg <- crop(raster::rotate(YIELD22), box) #RCP45_85LU
    
    area1.reg <- crop(raster::rotate(AREA1), box) #RCP45
    area2.reg <- crop(raster::rotate(AREA2), box) #RCP45_85LU
    #irrig1.reg <- crop(raster::rotate(IRRIG1), box) #RCP45
    #irrig2.reg <- crop(raster::rotate(IRRIG2), box) #RCP45_85LU
    prod1.reg <- crop(raster::rotate(PROD1), box) #RCP45
    prod2.reg <- crop(raster::rotate(PROD2), box) #RCP45_85LU
    yield1.reg <- crop(raster::rotate(YIELD1), box) #RCP45
    yield2.reg <- crop(raster::rotate(YIELD2), box) #RCP45_85LU
    fertn1.reg <- crop(raster::rotate(FERTN1), box) #RCP45
    fertn2.reg <- crop(raster::rotate(FERTN2), box) #RCP45_85LU

    #========analyze the difference in weighted climate trends between different land use scenarios
    # yield.present <- calc(subset(yield1.reg, 1:10), fun=mean, na.rm=TRUE) #2006-2015 CNTL yield
    # yield1.anomaly <- yield1.reg - yield.present 
    # yield2.anomaly <- yield2.reg - yield.present

    #====the difference in unit yield is due to climate and soil N   
    tsa1.reg <- crop(raster::rotate(tsa1), box)-273.15
    tsa2.reg <- crop(raster::rotate(tsa2), box)-273.15
    #tsa1.dif <- (tsa1.reg - tsa2.reg)/tsa2.reg*100 #zero change in climate but change in crop weight
    plot(raster(tsa1.reg,94))
    #===Climate anomaly is not the point of interest for land use analysis; analyze the absolute climate difference caused by LUC
    # tsa.present <- calc(subset(tsa1.reg, 1:10), fun=mean, na.rm=TRUE) #2006-2015 mean (better use HIST 1986-2005)
    # tsa1.anomaly <- (tsa1.reg - tsa.present)
    # tsa2.anomaly <- (tsa2.reg - tsa.present)
    rh1.reg <- crop(raster::rotate(rh1), box)
    rh2.reg <- crop(raster::rotate(rh2), box)
    
    ppt1.reg <- (crop(raster::rotate(rain1), box) + crop(raster::rotate(snow1), box))*365*3600*24  #mm/s to mm/year
    ppt2.reg <- (crop(raster::rotate(rain2), box) + crop(raster::rotate(snow2), box))*365*3600*24
    # ppt.present <- calc(subset(ppt1.reg, 1:10), fun=mean, na.rm=TRUE) #2006-2015 mean (better use HIST 1986-2005)
    # ppt1.anomaly <- (ppt1.reg - ppt.present)
    # ppt2.anomaly <- (ppt2.reg - ppt.present)
 
    sminn1.reg <- crop(raster::rotate(sminn1), box)
    sminn2.reg <- crop(raster::rotate(sminn2), box)
    # sminn.present <- calc(subset(sminn1.reg, 1:10), fun=mean, na.rm=TRUE) #2006-2015 mean (better use HIST 1986-2005)
    # sminn1.anomaly <- (sminn1.reg - sminn.present)
    # sminn2.anomaly <- (sminn2.reg - sminn.present)
   
    sand.reg <- crop(raster::rotate(sand), box)
    clay.reg <- crop(raster::rotate(clay), box)
    sand.1m <- calc(subset(sand.reg, 1:8), fun=mean, na.rm=TRUE) #Top 1m mean soil texture
    clay.1m <- calc(subset(clay.reg, 1:8), fun=mean, na.rm=TRUE)
    
    #==========calculate crop area weighted average yield and climate per year    
    #crop area weighted-intermediate use 
    area11.s <- cellStats(area11.reg, stat='sum', na.rm=TRUE)
    area12.s <- cellStats(area12.reg, stat='sum', na.rm=TRUE)
    area21.s <- cellStats(area21.reg, stat='sum', na.rm=TRUE)
    area22.s <- cellStats(area22.reg, stat='sum', na.rm=TRUE)
    area1.s <- cellStats(area1.reg, stat='sum', na.rm=TRUE) #total crop area per year/layer
    area2.s <- cellStats(area2.reg, stat='sum', na.rm=TRUE) #do not use sum(y,na.rm = T), which sums over years per grid cell
    prod1.s <- cellStats(prod1.reg, stat='sum', na.rm=TRUE) #total crop production per year/layer
    prod2.s <- cellStats(prod2.reg, stat='sum', na.rm=TRUE) 
    
    yield1.w <- overlay(yield1.reg, area1.reg, fun=function(x,y){(x*y)}) #return 95 layers
    yield2.w <- overlay(yield2.reg, area2.reg, fun=function(x,y){(x*y)})
    yield11.w <- overlay(yield11.reg, area11.reg, fun=function(x,y){(x*y)}) #rainfed
    yield12.w <- overlay(yield12.reg, area12.reg, fun=function(x,y){(x*y)}) #irrig
    yield21.w <- overlay(yield21.reg, area21.reg, fun=function(x,y){(x*y)}) #rainfed
    yield22.w <- overlay(yield22.reg, area22.reg, fun=function(x,y){(x*y)}) #irrig
    
    tsa1.w <- overlay(tsa1.reg, area1.reg, fun=function(x,y){(x*y)}) #return 95 layers
    tsa2.w <- overlay(tsa2.reg, area2.reg, fun=function(x,y){(x*y)})
    rh1.w <- overlay(rh1.reg, area1.reg, fun=function(x,y){(x*y)}) #return 95 layers
    rh2.w <- overlay(rh2.reg, area2.reg, fun=function(x,y){(x*y)})
    ppt1.w <- overlay(ppt1.reg, area1.reg, fun=function(x,y){(x*y)}) #return 95 layers
    ppt2.w <- overlay(ppt2.reg, area2.reg, fun=function(x,y){(x*y)})
    fertn1.w <- overlay(fertn1.reg, area1.reg, fun=function(x,y){(x*y)}) #FERTN is a more direct driver of soil nutrient status
    fertn2.w <- overlay(fertn2.reg, area2.reg, fun=function(x,y){(x*y)})
    sminn1.w <- overlay(sminn1.reg, area1.reg, fun=function(x,y){(x*y)}) #return 95 layers
    sminn2.w <- overlay(sminn2.reg, area2.reg, fun=function(x,y){(x*y)})
   #cellStats(sminn1.reg, stat='mean', na.rm=TRUE) #original region mean per year
    sand1.w <- overlay(sand.1m, area1.reg, fun=function(x,y){(x*y)}) #return 95 layers
    sand2.w <- overlay(sand.1m, area2.reg, fun=function(x,y){(x*y)}) #although sand is static, area wt is changing
    clay1.w <- overlay(clay.1m, area1.reg, fun=function(x,y){(x*y)}) #return 95 layers
    clay2.w <- overlay(clay.1m, area2.reg, fun=function(x,y){(x*y)})
   
    #====area weighted average (for correlation analysis, e.g. yield1.wm~tsa1.wm+sminn1.wm)
    yield1.wm <- cellStats(yield1.w, stat='sum', na.rm=TRUE)/area1.s #this gives area weighted mean value per year 
    yield2.wm <- cellStats(yield2.w, stat='sum', na.rm=TRUE)/area2.s 
    #plot(raster(yield1.wm,94))
    yield11.wm <- cellStats(yield11.w, stat='sum', na.rm=TRUE)/area11.s #rainfed
    yield12.wm <- cellStats(yield12.w, stat='sum', na.rm=TRUE)/area12.s #irrigated
    yield21.wm <- cellStats(yield21.w, stat='sum', na.rm=TRUE)/area21.s 
    yield22.wm <- cellStats(yield22.w, stat='sum', na.rm=TRUE)/area22.s
    
    #[AreaWeighted Avg] =  SUM([Temperature]*[Area])/SUM([Area])
    tsa1.wm <- cellStats(tsa1.w, stat='sum', na.rm=TRUE)/area1.s #weighted mean
    tsa2.wm <- cellStats(tsa2.w, stat='sum', na.rm=TRUE)/area2.s 
    rh1.wm <- cellStats(rh1.w, stat='sum', na.rm=TRUE)/area1.s #weighted mean
    rh2.wm <- cellStats(rh2.w, stat='sum', na.rm=TRUE)/area2.s 
    ppt1.wm <- cellStats(ppt1.w, stat='sum', na.rm=TRUE)/area1.s #weighted mean
    ppt2.wm <- cellStats(ppt2.w, stat='sum', na.rm=TRUE)/area2.s 
    fertn1.wm <- cellStats(fertn1.w, stat='sum', na.rm=TRUE)/area1.s #weighted mean
    fertn2.wm <- cellStats(fertn2.w, stat='sum', na.rm=TRUE)/area2.s 
    irrig1.wm <- area12.s/area1.s*100 #weighted mean
    irrig2.wm <- area22.s/area2.s*100 #irrigated %

    sminn1.wm <- cellStats(sminn1.w, stat='sum', na.rm=TRUE)/area1.s #weighted mean
    sminn2.wm <- cellStats(sminn2.w, stat='sum', na.rm=TRUE)/area2.s 
    sand1.wm <- cellStats(sand1.w, stat='sum', na.rm=TRUE)/area1.s #soil texture changes little with crop redistribution
    sand2.wm <- cellStats(sand2.w, stat='sum', na.rm=TRUE)/area2.s 
    clay1.wm <- cellStats(clay1.w, stat='sum', na.rm=TRUE)/area1.s #little difference
    clay2.wm <- cellStats(clay2.w, stat='sum', na.rm=TRUE)/area2.s 
    
    #=======crop area weighted percentiles of anomaly versus control (for distribution of anomaly with reference to current state)

    #install.packages("reldist") #package for calculating area-weighted quantiles
    #library(reldist)
    #citation("reldist") #cite this as reference
    #Mark S. Handcock (2016), Relative Distribution Methods. Version 1.6-6. Project home page at
    #http://www.stat.ucla.edu/~handcock/RelDist. URL https://CRAN.R-project.org/package=reldist.
    #tsa.wq1 <- wtd.quantile (tsa1.reg, q=0.5, na.rm = TRUE, weight=area1.reg/area1.s) # does not work for raster stack
    #detach("package:reldist", unload=TRUE)
    
    library(Hmisc) 
    #wq <- wtd.quantile(tsa1.reg@data@values, weight=wt1@data@values, probs=c(0.05, .25, .5, .75, 0.95), na.rm=TRUE)
    #original wtd.quantile function only works on vectors
    wtq.stack <- function(x,y){ #wtd.quantile function for stack of rasters
      wqts <- c()
      for (n in 1:nlayers(y)) {
        if (nlayers(x)==1){x.layer <- as.vector(x)} else { #for static sand/clay raster
        x.layer <- as.vector(subset(x,n))}
        y.layer <- as.vector(subset(y,n))
        #x.layer[y.layer==0] <- NA #0s and NAs will both affect quntiles if normwt=FALSE
        #y.layer[y.layer==0] <- NA #remove grid cells with 0 crop (no need when set normwt=TRUE)
        wtd.qt <- wtd.quantile(x.layer, weights=y.layer, probs=c(.01,.05, .25, .5, .75,.95,.99), normwt=TRUE, na.rm=TRUE)
        wqts <- rbind(wqts,wtd.qt)
      }
      return(wqts)
    }
    wtq.ave <- function(x,y,z1,z2){ #wtd.quantile function for an averaging period z
      yr1 <- z1-2006+1
      yr2 <- z2-2006+1
      x.m <- as.vector(calc(subset(x, yr1:yr2), fun=mean, na.rm=TRUE))
      y.m <- as.vector(calc(subset(y, yr1:yr2), fun=mean, na.rm=TRUE))
      wtd.qt <- wtd.quantile(x.m, weights=y.m, probs=c(.01,.05, .25, .5, .75,.95,.99), normwt=TRUE, na.rm=TRUE)
      return(wtd.qt)
    }

    wt1 <- area1.reg/area1.s #cellStats(wt1, stat='sum', na.rm=TRUE) #all wt sum to unity 1
    wt2 <- area2.reg/area2.s 

    yield1.wqs <- wtq.stack(yield1.reg, wt1) #RCP45 2006:2100 time series
    yield2.wqs <- wtq.stack(yield2.reg, wt2) #RCP45_85LU
    yield1.wq2075s <- wtq.ave(yield1.reg, wt1, 2075, 2099) #time slice
    yield1.wq2090s <- wtq.ave(yield1.reg, wt1, 2090, 2099)
    yield2.wq2075s <- wtq.ave(yield2.reg, wt2, 2075, 2099)
    yield2.wq2090s <- wtq.ave(yield2.reg, wt2, 2090, 2099)


    tsa1.wqs <- wtq.stack(tsa1.reg, wt1) #RCP45 2006:2100
    tsa2.wqs <- wtq.stack(tsa2.reg, wt2) #RCP85
    #=======Difference in climate anomaly (versus 2006-2015 average) weighted by crop area between SSP2 & SSP5
    #=======is much smaller than difference in crop weighted climate trends between SSP2 & SSP5; 
    #=======climate anomaly is not the point of interest for land use analysis; analyze the absolute climate difference caused by LUC
    # tsa1.wq2050s <- wtq.ave(tsa1.anomaly, wt1, 2050, 2059)
    # tsa1.wq2090s <- wtq.ave(tsa1.anomaly, wt1, 2090, 2099)
    # #qt <- quantile(tsa1.reg, probs=c(0.05, .25, .5, .75, 0.95), na.rm=TRUE) #compare with normal quantile
    # tsa2.wq2050s <- wtq.ave(tsa2.anomaly, wt2, 2050, 2059)
    # tsa2.wq2090s <- wtq.ave(tsa2.anomaly, wt2, 2090, 2099)
    #=======
    tsa1.wq2075s <- wtq.ave(tsa1.reg, wt1, 2075, 2099)
    tsa1.wq2090s <- wtq.ave(tsa1.reg, wt1, 2090, 2099)
    #qt <- quantile(tsa1.reg, probs=c(0.05, .25, .5, .75, 0.95), na.rm=TRUE) #compare with normal quantile
    tsa2.wq2075s <- wtq.ave(tsa2.reg, wt2, 2075, 2099)
    tsa2.wq2090s <- wtq.ave(tsa2.reg, wt2, 2090, 2099)

    rh1.wqs <- wtq.stack(rh1.reg, wt1) #RCP45 2006:2100
    rh2.wqs <- wtq.stack(rh2.reg, wt2) #RCP85
    rh1.wq2075s <- wtq.ave(rh1.reg, wt1, 2075, 2099)
    rh1.wq2090s <- wtq.ave(rh1.reg, wt1, 2090, 2099)
    rh2.wq2075s <- wtq.ave(rh2.reg, wt2, 2075, 2099)
    rh2.wq2090s <- wtq.ave(rh2.reg, wt2, 2090, 2099)

    ppt1.wqs <- wtq.stack(ppt1.reg, wt1) #RCP45 2006:2100
    ppt2.wqs <- wtq.stack(ppt2.reg, wt2) #RCP45_85LU
    ppt1.wq2075s <- wtq.ave(ppt1.reg, wt1, 2075, 2099)
    ppt1.wq2090s <- wtq.ave(ppt1.reg, wt1, 2090, 2099)
    ppt2.wq2075s <- wtq.ave(ppt2.reg, wt2, 2075, 2099)
    ppt2.wq2090s <- wtq.ave(ppt2.reg, wt2, 2090, 2099)
    
    fertn1.wqs <- wtq.stack(fertn1.reg, wt1) #RCP45 2006:2100
    fertn2.wqs <- wtq.stack(fertn2.reg, wt2) #RCP45_85LU
    fertn1.wq2075s <- wtq.ave(fertn1.reg, wt1, 2075, 2099)
    fertn1.wq2090s <- wtq.ave(fertn1.reg, wt1, 2090, 2099)
    fertn2.wq2075s <- wtq.ave(fertn2.reg, wt2, 2075, 2099)
    fertn2.wq2090s <- wtq.ave(fertn2.reg, wt2, 2090, 2099)
    
    sminn1.wqs <- wtq.stack(sminn1.reg, wt1) #RCP45 2006:2100
    sminn2.wqs <- wtq.stack(sminn2.reg, wt2) #RCP45_85LU
    sminn1.wq2075s <- wtq.ave(sminn1.reg, wt1, 2075, 2099)
    sminn1.wq2090s <- wtq.ave(sminn1.reg, wt1, 2090, 2099)
    sminn2.wq2075s <- wtq.ave(sminn2.reg, wt2, 2075, 2099)
    sminn2.wq2090s <- wtq.ave(sminn2.reg, wt2, 2090, 2099)
    #sminn1.wq2050s;sminn2.wq2050s;sminn3.wq2050s #RCP45 has much more fertile soil
    #sminn1.wq2090s;sminn2.wq2090s;sminn3.wq2090s
    sand1.wqs <- wtq.stack(sand.1m, wt1) #RCP45 2006:2100
    sand2.wqs <- wtq.stack(sand.1m, wt2) #RCP45_85LU
    clay1.wqs <- wtq.stack(clay.1m, wt1) #RCP45 2006:2100
    clay2.wqs <- wtq.stack(clay.1m, wt2) #RCP45_85LU

    
    
    df <- data.table(Variable=rep(c("YIELD", "TSA", "PRECIP", "RH", "FERTN","SMINN"), each=7*2),
                     Scenario=rep(rep(c("RCP45","RCP45_85LU"), each=7), time=5),
                     Probs=rep(c("1%", "5%", "25%", "50%", "75%", "95%", "99%"), time=5*2),
                     WtQ1=c(yield1.wq2075s, yield2.wq2075s, 
                            tsa1.wq2075s,tsa2.wq2075s,
                            ppt1.wq2075s,ppt2.wq2075s,
                            rh1.wq2075s,rh2.wq2075s,
                            fertn1.wq2075s,fertn2.wq2075s,
                            sminn1.wq2075s,sminn2.wq2075s),
                     WtQ2=c(yield1.wq2090s, yield2.wq2090s, 
                            tsa1.wq2090s,tsa2.wq2090s,
                            ppt1.wq2090s,ppt2.wq2090s,
                            rh1.wq2090s,rh2.wq2090s,
                            fertn1.wq2090s,fertn2.wq2090s,
                            sminn1.wq2090s,sminn2.wq2090s),
                     Region=region[reg] )
    df2 <- data.table(Year=rep(2006:2100, time=8*2*7), #8 statistics x 2 scenarios x 7 variables 
                     Variable=rep(c("YIELD", "TSA", "PRECIP", "RH", "FERTN", "SMINN", "SAND", "CLAY"), each=95*8*2),
                     Scenario=rep(rep(c("RCP45","RCP45_85LU"), each=95*8), time=7),
                     Stat=rep(rep(c("1%", "5%", "25%", "50%", "75%", "95%", "99%", "Mean"), each=95), time=7*2),
                     Value=c(cbind(yield1.wqs, yield1.wm), cbind(yield2.wqs, yield2.wm), # cbind(yield3.wqs, yield3.wm),
                             cbind(tsa1.wqs,tsa1.wm), cbind(tsa2.wqs,tsa2.wm), # cbind(tsa3.wqs, tsa3.wm), 
                             cbind(ppt1.wqs,ppt1.wm), cbind(ppt2.wqs,ppt2.wm), #cbind(ppt3.wqs, ppt3.wm), 
                             cbind(rh1.wqs,rh1.wm), cbind(rh2.wqs,rh2.wm),
                             cbind(fertn1.wqs,fertn1.wm), cbind(fertn2.wqs,fertn2.wm),
                             cbind(sminn1.wqs,sminn1.wm), cbind(sminn2.wqs,sminn2.wm),
                             cbind(sand1.wqs,sand1.wm), cbind(sand2.wqs,sand2.wm), 
                             cbind(clay1.wqs,clay1.wm), cbind(clay2.wqs,clay2.wm)),  
                     #matrix to vector: first rows and then columns=>list all values of stat 1 and then stat 2
                     Region=region[reg] )


    df3 <- data.table(Year=rep(2006:2100, time=10*2),
                      Variable=rep(c("AREA", "PROD", "YIELD", "YIELD.RAIN", "YIELD.IRRIG", 
                                     "IRRIG", "TSA", "PRECIP", "RH", "FERTN", "SMINN"), each=95*2),
                      Scenario=rep(rep(c("RCP45","RCP45_85LU"), each=95), time=10),
                      Value=c(area1.s,area2.s,prod1.s,prod2.s,yield1.wm,yield2.wm,
                              yield11.wm,yield21.wm,yield12.wm,yield22.wm,irrig1.wm,irrig2.wm,
                              tsa1.wm,tsa2.wm,ppt1.wm,ppt2.wm,rh1.wm,rh2.wm,fertn1.wm,fertn2.wm,sminn1.wm,sminn2.wm),
                      Region=region[reg] )
      
    df.reg <- rbind(df.reg, df)
    df2.reg <- rbind(df2.reg, df2)
    df3.reg <- rbind(df3.reg, df3)
    
  } #nreg 
  df.reg$Crop <- crops[n]
  df.all <- rbind(df.all, df.reg)
  df2.reg$Crop <- crops[n]
  df2.all <- rbind(df2.all, df2.reg)
  df3.reg$Crop <- crops[n]
  df3.all <- rbind(df3.all, df3.reg)
  
  rm(AREA1,AREA2,AREA11,AREA12,AREA21,AREA22,YIELD1,YIELD2,YIELD11,YIELD12,YIELD21,YIELD22,PROD1,PROD2,PROD11,PROD12,PROD21,PROD22,FERTN1,FERTN2,FERTN11,FERTN12,FERTN21,FERTN22)
  rm(wt1,wt2,wt3,area1.reg,area2.reg,yield1.reg,yield2.reg,prod1.reg,prod2.reg,fertn1.reg,fertn2.reg,area11.reg,area12.reg,area21.reg,area22.reg,yield11.reg,yield12.reg,yield21.reg,yield22.reg)
  rm(yield1.w,yield2.w, fertn1.w,fertn2.w,yield11.w,yield12.w,yield21.w,yield22.w)
  rm(tsa1, tsa2, rain1, rain2, snow1, snow2, sminn1, sminn2, sand, clay, sand.1m, clay.1m) 
  rm(tsa1.reg, tsa2.reg, ppt1.reg, ppt2.reg, rh1.reg, rh2.reg, sminn1.reg, sminn2.reg, sand.reg, clay.reg)
  rm(tsa1.w, tsa2.w, ppt1.w, ppt2.w, sminn1.w, sminn2.w, sand1.w,sand2.w, clay1.w,clay2.w)
  
}#ncft

df.all[Crop=="Corn", Crop:="Maize"]
df.all[Crop=="Soybean", Crop:="Soy"]
df2.all[Crop=="Corn", Crop:="Maize"]
df2.all[Crop=="Soybean", Crop:="Soy"]
df3.all[Crop=="Corn", Crop:="Maize"]
df3.all[Crop=="Soybean", Crop:="Soy"]



save(list = c("df.all","df2.all", "df3.all"), file =paste0(wd,"/data/RData/Fig.S5-6.LUC-Analysis-WeightedQuantiles.Rdata"))











